*This file produces many intermediate files which are referenced in the main
*	code files. The code is separated into 13 sections (A-M), where each section defines
*	the files which are created in that section.
********************************************************************************
*DEFINE DIRECTORIES
local home CHILD
local main CHILD/JPE
local logs CHILD/JPE/logs
local data CHILD/JPE/data
local results CHILD/JPE/results
local network NETWORK
********************************************************************************

*******************************************************************
*******************************************************************
*SECTION A: basis_sample.dta, demographics_check.dta, final_final.dta
*******************************************************************

*carveout_check includes only those folks that have claims; carveout_check2
*	includes everyone; even if they don't have claims (N=18,514,373 unique patientid's)
cd "`home'"
import delimited comparison_carveout_check2.txt, varnames(1) clear
generate out = months_to_first<0 | months_after_last<0
replace invalid = 1 if invalid>1
keep patientid earliest latest out invalid
save coverage_check.dta, replace

*file with most commonly reported zip per person (N=18,514,373 unique patientid's)
cd "`home'"
import delimited comparison_carveout_3.txt, varnames(1) clear
by patientid, s: keep if _n==1
save zip_check.dta, replace

*file with comparison sample demographics (N=18,514,373 unique patientid's)
cd "`home'"
import delimited comparison_demog.txt, varnames(1) clear
drop patientid0
generate date = date(bday,"YMD")
format date %td
generate female = memb_gender_cd=="F"
drop bday memb_gender
duplicates drop

*1: drop anyone with inconsistent demographics within patientid
*by patientid, s: generate count=_N
*drop if count>1
*drop count

by patientid, s: generate count=_N
generate inconsistent_demo = count>1
drop count
by patientid, s: keep if _n==1

rename date birthday
save demographics_check.dta, replace

*combine three files*********************************
*this includes only those with consistent demographic info across patientid0's within patientid
cd "`home'"
use demographics_check.dta, clear

merge 1:1 patientid using coverage_check
keep if _merge==3
drop _merge

merge 1:1 patientid using zip_check
keep if _merge==3
drop _merge

generate early = date(earliest, "YMD")
drop earliest
generate earliest = floor((mofd(early)-mofd(birthday))/12)

generate late = date(latest, "YMD")
drop latest
generate latest = floor((mofd(late)-mofd(birthday))/12)

*merge 1:1 patientid using regression_sample.dta
*generate in_sample = _merge==3
*drop _merge

save sample_origin.dta, replace

**************construct the sample
cd "`home'"
use sample_origin.dta, clear

count

*apply restrictions

****RESTRICTION ONE
*4: seen before 11 and between 10-18
keep if earliest<=10 & latest>=10
drop earliest latest

*2: all claims within coverage period
keep if out==0
drop out

*****RESTRICTION TWO
*3: no carveout during coverage
keep if invalid==0
drop invalid

*****RESTRICTION THREEss
*5: valid geographic information + in ACS 2017
rename member_zip zip

*convert to ZCTAs
preserve
cd "`network'"
import excel using PRINCETON_06192019.zip_to_zcta_2018.xlsx, firstrow clear
rename ZIP_CODE zip
destring zip ZCTA, replace
keep zip ZCTA
tempfile temp
save `temp', replace
restore

replace zip = "" if real(zip)==.
destring zip, replace
merge m:1 zip using `temp'
drop if _merge==2
drop _merge

*get rid of inconsistent demographics
drop if inconsistent_demo==1

*merge in the sample data
cd "`home'"
merge 1:1 patientid using
 regression_sample.dta
drop if _merge==2
drop _merge
drop i_therapy- adhd
destring zcta_kid, replace
replace ZCTA = zcta_kid if zcta_kid!=.
replace zcta_kid = ZCTA
drop ZCTA

*merge in census
preserve
cd "`home'"
use zcta_acs_2017.dta, clear
destring ZCTA, replace
rename ZCTA zcta_kid
tempfile temp
save `temp', replacecount

restore

merge m:1 zcta_kid using `temp'
drop if _merge==2
drop _merge

*merge on add on census
preserve
cd "`home'"
use zcta_add_on.dta, clear
rename ZCTA5 zcta_kid
destring zcta_kid, replace
tempfile temp
save `temp', replace
restore

merge m:1 zcta_kid using `temp'
drop if _merge==2
drop _merge

*drop if missing ZCTA stats
generate missing = 0
foreach var of varlist black asian native other hispanic less_high some_college college college_plus med_hh_income single_parent owner_occupied poverty {
replace missing=1 if `var'==.
}
drop if missing==1
drop med_hh_income-missing

*save sample
cd "`home'"
save basis_sample.dta, replace

*save comparison sample list
preserve
keep patientid zcta_kid
save comparison_sample.dta, replace
restore

*parse variables
cd  "`home'"
use patientid earliest invalid out latest zcta_kid in_sample member_zip using sample_origin.dta, clear

*merge in treatment info
preserve
use regression_sample.dta, clear
keep patientid treated drug_only therapy_only both tca benzo badfda
tempfile temp
save `temp', replace
restore
merge 1:1 patientid using `temp'
drop _merge

foreach var of varlist treated-badfda {
replace `var' = 0 if `var'==.
}

merge 1:1 patientid using mh_kids.dta
drop if _merge==2
generate any_mh = _merge==3
drop _merge

keep if any_mh==1

*6: first MH event occurred no earlier than age 10 and no later than 18
generate old_enough = age_at_diagnosis>=120

keep if old_enough==1

*7a: seen for at least three months after initial MH event
preserve

use patientid months_after_diagnosis mbrs first_mh using full_data.dta, clear

*determine how long we see people after diagnosis
by patientid, s: egen max = max(months_after)
generate at_least_three = max>=3
keep if months_after>=0 & months_after<=3

*parse variables
rename mbrs zip

tempfile temp
save `temp', replace

restore

merge 1:m patientid using `temp'
drop if _merge==2
drop _merge

keep if at_least_three
drop at_least_three

*7b: all missing geo info in first three months

*convert to ZCTAs
preserve
cd "`network'"
import excel using PRINCETON_06192019.zip_to_zcta_2018.xlsx, firstrow clear
rename ZIP_CODE zip
destring zip ZCTA, replace
keep zip ZCTA
tempfile temp
save `temp', replace
restore

merge m:1 zip using `temp'
drop if _merge==2

generate missing = ZCTA==.
by patientid, s: egen test = sum(missing)

generate missing_geo = test>0
drop zcta_kid
rename ZCTA zcta_kid

*7c: moved in first three months
by patientid zcta_kid, s: keep if _n==1
by patientid, s: generate zctas = _N

generate not_move = zctas==1
keep if not_move
drop _merge-not_move

generate count = 1
by patientid, s: egen test = sum(count)
keep if test==1
drop count test

unique patientid

*7d: have valid prescribing info
generate year = yofd(first_mh)

*merge in supply market
tostring zcta_kid, format(%05.0f) replace
cd "`home'"
merge m:1 zcta_kid year using market-measures.dta
drop if _merge==2

generate supplier_available = _merge==3
keep if supplier_available

*parse variables
cd "`home'"
keep patientid
save final_final.dta, replace

*******************************************************************
*******************************************************************
**# Bookmark #2
*SECTION B: diagnoses_insurer_population.dta
*******************************************************************

*bring in files
cd "`home'"
import delimited c2a_all.txt, varnames(1) clear
save c2a_all.dta, replace

import delimited c2b_all.txt, varnames(1) clear
save c2b_all.dta, replace

import delimited c2d_all.txt, varnames(1) clear
save c2d_all.dta, replace

import delimited c3a_all_collapse.txt, varnames(1) clear
save c3a_all_collapse.dta, replace

import delimited c3b_all_collapse.txt, varnames(1) clear
save c3b_all_collapse.dta, replace

*create huge file with all facility
use facility_id patientid admtng* princ_icd* using c2a_all.dta, clear
merge 1:m facility_id using c2b_all.dta
drop _merge

merge m:1 facility_id using c2d_all.dta
drop _merge

rename sec_icd9_diag_cd* icd9_*
rename sec_icd10_diag_cd* icd10_*
rename princ_icd9 icd9_0
rename princ_icd10 icd10_0
rename admtng_diag_cd icd9_11
rename admtng_icd10_diag_cd icd10_25

drop facil
order icd9_* icd10_*, sequential
order patientid, first

tempfile temp
save `temp', replace

*create huge file with all professional
use c3a_all_collapse.dta, clear

*recall: when we collapsed data in SQL, we assigned ICD9 variable name to both
*	ICD9 and ICD10, so need to parse that variable into two separate
rename princ_icd9_diag_cd princ_diag
generate princ_icd9 = ""
replace princ_icd9 = princ_diag if icd==9
generate princ_icd10 = ""
replace princ_icd10 = princ_diag if icd==10
drop princ_diag

tempfile temp2
save `temp2', replace

*create huge file with all professional
use c3b_all_collapse.dta, clear

*recall: when we collapsed data in SQL, we assigned ICD9 variable name to both
*	ICD9 and ICD10, so need to parse that variable into two separate
rename sec_icd9_diag_cd1 sec_diag
generate sec_icd9 = ""
replace sec_icd9 = sec_diag if icd==9
generate sec_icd10 = ""
replace sec_icd10 = sec_diag if icd==10
drop sec_diag icd 

tempfile temp3
save `temp3', replace

*combine two diagnosis code files (ICD9 files)
use `temp', clear
keep patientid *icd9*
by patientid, s: generate case = _n
reshape long icd9, i(patientid case) j(num) str
drop case num
replace icd9 = subinstr(icd9," ","",.)
drop if icd9==""
by patientid icd9, s: keep if _n==1

*save here.dta
rename icd9 princ_icd9

append using `temp2'
drop princ_icd10

rename princ_icd9 sec_icd9
append using `temp3'
drop *icd10*
rename sec_icd9 icd9

by patientid icd9, s: keep if _n==1
save icd9_all.dta

*combine two diagnosis code files (ICD10 files)
use `temp', clear
keep patientid *icd10*
by patientid, s: generate case = _n
reshape long icd10, i(patientid case) j(num) str
drop case num
replace icd10 = subinstr(icd10," ","",.)
drop if icd10==""
by patientid icd10, s: keep if _n==1

*save here.dta
rename icd10 princ_icd10
 
append using `temp2'
drop princ_icd9

rename princ_icd10 sec_icd10
append using `temp3'
drop *icd9*
rename sec_icd10 icd10

by patientid icd10, s: keep if _n==1
save icd10_all.dta

cd "`home'"

*clean ICD9
use icd9_all.dta, clear

*replace the icd9 variable with trimmed version
replace icd9 = trim(icd9)
replace icd9 = "" if icd9=="NA"
drop if icd9==""

*generate self-harm indicator (recall no suicide)
generate ideation =  regexm(icd9,"V62.84")
generate harm = regexm(icd9,"^E95[0-9]+[.]*")
generate self_harm = ideation+harm>0
keep patientid icd9 self_harm

*generate depression indicator
generate depression = regexm(icd9,"296.[2-3]+[0-9]*") | icd9=="296.99" | icd9=="300.4" | icd9=="625.4" | icd9=="298.83" | icd9=="311"
generate anxiety = icd9=="309.21" | icd9=="313.23" | icd9=="300.29" | icd9=="300.23" | icd9=="300.01" | icd9=="300.22" | icd9=="300.02" | icd9=="293.84" | icd9=="300.09" | icd9=="300.00"
generate adjustment_anxiety = icd9=="309.24"
generate adjustment_depression = icd9=="309.0" | icd9=="309.1"
generate adjustment_dax = icd9=="309.28"

*flag the V40.0 codes--hard code to number so retained
replace icd9 = "0" if icd9=="V40.0"

rename icd9 code
replace code="" if real(code)==.
destring code, replace
merge m:1 code using icd9_mh.dta
generate mh = _merge==3
drop if _merge==2
drop _merge

generate inrange = code>=290 & code<320 | code==0 | code==330.8
generate codef = floor(code)

*hard code neurodevelopmental
generate nd = codef==314 | codef==312 | codef==313 | codef==309 | codef==307 | codef==299 | codef==330 | codef==315 | codef==316 | codef==0
*get rid of alexia, tension headache, and any tagged MH
replace nd = 0 if code==315.01
replace nd = 0 if code==307.81 | code==307.89
replace nd = 0 if mh

generate adhd = codef==314 & nd==1

collapse (sum) nd adhd self_harm depression-adjustment_dax, by(patientid)

*hard code to 1 (could be above)
replace nd=1 if nd>1
replace adhd=1 if adhd>1
replace self_harm = 1 if self_harm>1
replace depression = 1 if depression>1
replace anxiety = 1 if anxiety>1
replace adjustment_anxiety = 1 if adjustment_anxiety>1
replace adjustment_depression = 1 if adjustment_depression>1
replace adjustment_dax = 1 if adjustment_dax>1

*save file
save nd_9_all.dta, replace

*clean ICD10
use icd10_all.dta, clear

*generate self-harm indicator (recall no suicide)
generate ideation =  regexm(icd10,"R45.851")
generate attempt = regexm(icd10,"T14.91")
generate harm = regexm(icd10,"^T[3-7]+[0-9]*.[0-9]*[A]$") | regexm(icd10,"^X[7-8]+[0-9]*.[0-9]*[A]$") 
generate self_harm = ideation+attempt+harm>0
keep patientid icd10 self_harm

*flag dax
generate depression = regexm(icd10,"F3[2-3]+.*") | icd10=="F34.8" | icd10=="F34.1" | icd10=="F94.3" | icd10=="F06.31" | icd10=="F06.32" | icd10=="F06.34" | icd10=="F32.8" | icd10=="F32.9"
generate anxiety = icd10=="F93.0" | icd10=="F94.0" | regexm(icd10,"F40.2[0-9]*") | icd10=="F40.10" | icd10=="F41.0" | icd10=="F40.00" | icd10=="F41.1" | icd10=="F06.4" | icd10=="F41.8" | icd10=="F41.9" | icd10=="F41.3"
generate adjustment_anxiety = icd10=="F43.22"
generate adjustment_depression = icd10=="F43.21"
generate adjustment_dax = icd10=="F43.23"

*code in 
rename icd10 code
merge m:1 code using icd10d.dta
generate mh = i_mh==1
drop if _merge==2
drop _merge

generate inrange = substr(code,1,1)=="F"
generate nd = substr(code,1,2)=="F9" | substr(code,1,2)=="F8"
replace nd = 0 if mh
generate adhd = substr(code,1,3)=="F90"

collapse (sum) self_harm nd adhd depression-adjustment_dax, by(patientid)

*hard code to 1 (could be above)
replace nd =1 if nd>1
replace adhd=1 if adhd>1
replace self_harm=1 if self_harm>1
replace depression = 1 if depression>1
replace anxiety = 1 if anxiety>1
replace adjustment_anxiety = 1 if adjustment_anxiety>1
replace adjustment_depression = 1 if adjustment_depression>1
replace adjustment_dax = 1 if adjustment_dax>1

*save file
save nd_10_all.dta, replace

*combine two files
use nd_9_all.dta, clear
append using nd_10_all.dta
collapse (sum) nd adhd self_harm depression-adjustment_dax, by(patientid)
replace nd =1 if nd>1
replace adhd=1 if adhd>1
replace self_harm=1 if self_harm>1
foreach var of varlist depression-adjustment_dax {
	replace `var' = 1 if `var'>1
}

*save file
cd "`home'"
save diagnoses_insurer_population.dta, replace

*******************************************************************
*******************************************************************
**# Bookmark #3
*SECTION C: bad_prescribing2.dta
*******************************************************************

*bring in data
cd "`home'"
import delimited ccc_pull_mhdrugs.txt, varnames(1) clear

*merge in REVISED DIAGNOSIS INFO
cd "`home'"
merge m:1 patientid using diagnosis_revised2.dta
drop if _merge==2 | exclude==1
keep if ada==1
drop _merge exclude

*format date
generate date = date(serv_dt,"YMD")
format %td date
drop serv_dt

*fix up product code
tostring prod_serv_id_cd, gen(ndc11) format(%011.0f)
generate ndc9 = substr(ndc11,1,9)
drop ndc11 prod_serv_id_cd

*identify the drug
rename ndc9 ndc
preserve
import excel ndc_data_info.xlsx, firstrow clear
generate ndc = substr(NDC,1,9)
destring ndc, replace
by ndc, s: keep if _n==1
drop NDC
tempfile temp
save `temp', replace
restore

destring ndc, replace
merge m:1 ndc using `temp'
drop if _merge==2
order ndc DrugName-GPI_14_DESC, first
drop _merge

rename ndc ndc9
merge m:1 ndc9 using child_index_mini.dta
drop if _merge==2
drop _merge

*merge in child birthdate
preserve
cd "`home'"
import delimited ccc_pull_demographics.txt, varnames(1) clear
keep patientid bday
generate birth = date(bday, "YMD")
format birth %td
duplicates drop
tempfile birth
save `birth', replace
restore

*merge in the child IDs
preserve
cd "`main'"
use regression_data_final_jpe.dta, clear
keep patientid first_mh

merge 1:m patientid using `birth'
keep if _merge==3
drop _merge bday

tempfile mmis
save `mmis'
restore

merge m:1 patientid using `mmis'
keep if _merge==3
drop _merge

*flag first 3 month prescription
generate date_diff = mofd(date)-mofd(first_mh)
generate age_at_prescription = (mofd(date)-mofd(birth))/12

*save file to here
tempfile temp
save `temp', replace

*****************************************look at length of benzos/hydroxyzine
use `temp', clear

*look at first six months
keep if date_diff<=6

*grab only hydroxyzine and benzos
keep if regexm(GPI_08,"Hydroxyzine") | benzo

*type of drug
generate drug = "benzo"
replace drug = "hydroxyzine" if regexm(GPI_08,"Hydroxyzine")
collapse (sum) days_supply, by(drug patientid)
generate too_long = days_supply>90
collapse (sum) too_long, by(patientid)
replace too_long=1 if too_long>1

keep patientid too_long
duplicates drop

tempfile temp2
save `temp2', replace

*****************************************************************bin the drugs
*work on grey area and FDA approved
use `temp', clear

*keep only first three months of prescriptions
keep if date_diff<=3

*merge in the length variable
merge m:1 patientid using `temp2'
drop if _merge==2
drop _merge
replace too_long = 0 if too_long==.

*flag SSRI/SNRI prescriptions
*flag non-SSRIS
generate ssri = 0
foreach var in Citalopram Escitalopram Paroxetine Fluoxetine Sertraline Vortioxetine Vilazodone Nefazodone {
	replace ssri = 1 if regexm(GPI_08,"`var'")
}

*flag non-SNRIs
generate snri = 0 
foreach var in Desvenlafaxine Duloxetine Levomilnacipran Milnacipran Venlafaxine {
	replace snri = 1 if regexm(GPI_08,"`var'")
}

*recode age variable
replace age = floor(age)

*flag anxiety medications
generate anxiety_med = regexm(GPI_08,"Hydroxyzine") | benzo

***WORLD ONE: 
*suppose adjustment_depression = depression; adjustment with anxiety = anxiety

replace depression = 1 if adjustment_depression==1
replace anxiety = 1 if adjustment_anxiety==1

*create FDA-ok flag*************************************************
generate fda_ok = (depression==1 & ( (age>=12 & regexm(GPI_08,"Escitalopram")) | (age>=8 & regexm(GPI_08,"^Fluoxetine")))) | (anxiety==1 & ((age>=7 & regexm(GPI_08,"Duloxetine")) | (age>=12 & regexm(GPI_08,"Lorazepam") & !too_long) | (age>=12 & regexm(GPI_08,"Midazolam") & !too_long) | (regexm(GPI_08,"Hydroxyzine") & !too_long)))
generate fda_bad = fda_ok==0

*create grey area flag
generate grey_area = ((ssri | snri) & fda_bad) | (anxiety_med & fda_bad & !too_long & anxiety)

*generate red flag
generate red_flag = grey_area==0 & fda_ok==0

*parse variables
keep patientid fda_ok grey_area red_flag
collapse (sum) fda_ok grey_area red_flag, by(patientid)

*recode
foreach var of varlist fda_ok grey_area red_flag {
    replace `var' = 1 if `var'>1
}

replace fda_ok= 0 if red_flag
replace grey_area= 0 if red_flag
replace fda_ok = 0 if grey_area

*save file
cd "`main'"
save bad_prescribing2.dta, replace

*******************************************************************
*******************************************************************
**# Bookmark #4
*SECTION D: comp_costs.dta, comp_fac.dta
*******************************************************************

*bring in total costs
cd "`home'"
import delimited comp_costs.txt, varnames(1) clear

*reshape wide
reshape wide costs, i(patientid dyear) j(type) str
foreach var of varlist costs* {
replace `var' =0 if `var'==.
}

*adjust costs with CPI
*CPI-U-RS
rename dyear year
foreach var of varlist costs* {
replace `var' = `var'*1.096 if year==2012
replace `var' = `var'*1.080 if year==2013
replace `var' = `var'*1.062 if year==2014
replace `var' = `var'*1.060 if year==2015
replace `var' = `var'*1.046 if year==2016
replace `var' = `var'*1.024 if year==2017
}

generate total = costsfacility+costspharm+costsprof

collapse (sum) costs* total, by(patientid)

save comp_costs.dta, replace

*bring in facility usage
cd "`home'"
import delimited comp_hosp.txt, varnames(1) clear
generate date = date(dates,"YMD")
format date %td
drop dates

*start with hospital
preserve
keep if i_hospital==1
by patientid date, s: keep if _n==1
generate count_h = 1
collapse (sum) count_h, by(patientid)
tempfile hospital
save `hospital', replace
restore

keep if i_er==1
by patientid date, s: keep if _n==1
generate count_e = 1
collapse (sum) count_e, by(patientid)

merge 1:1 patientid using `hospital'
drop _merge

replace count_e = 0 if count_e==.
replace count_h = 0 if count_h==.

save comp_fac.dta, replace

*******************************************************************
*******************************************************************
**# Bookmark #5
*SECTION E: jpe_sample.dta
*******************************************************************

*bring in data
cd "`home'"
use full_data.dta, clear

*merge with final sample
merge m:1 patientid using final_final.dta
keep if _merge==3
drop _merge

unique patientid
*202,066 kids who are all seen before age 11 AND fit sample restrictions (no 2-year follow up)

*determine how long we see people after diagnosis
by patientid, s: egen max = max(months_after)
keep if max>=24
drop max
*97,394 are seen for at least 24 months afterwards

*get IDs for these kids
cd "`main'"
save jpe_data.dta, replace

by patientid, s: keep if _n==1
keep patientid
save jpe_sample.dta, replace

*******************************************************************
*******************************************************************
**# Bookmark #6
*SECTION F: jpe_facility_diagnoses.dta, jpe_prof_diagnoses.dta
*******************************************************************
 
*identify JPE sample
cd "`main'"
use all_summary_statistics.dta, clear
generate bad = tca+benzo+badfda>0
keep if in_sample==1 & jpe
keep mmi_id
tempfile mmi
save `mmi', replace
 
*facility (only difference here from clean_diag_proc_codes is that we retain even non-MH codes)
cd "`home'"
*reshape to wide
import delimited facility_b.txt, varnames(1) clear
by facility_id, s: generate n = _n
reshape wide sec_icd10_diag_cd, i(facility_id) j(n)
tempfile tempb
save `tempb', replace

import delimited facility_d.txt, varnames(1) clear
tempfile tempd
save `tempd'

import delimited facility_a.txt, varnames(1) clear
save facility_a.dta, replace

use facility_a.dta, clear
merge 1:1 facility_id using `tempb'
drop _merge

merge 1:1 facility_id using `tempd'
drop _merge

drop princ_poa_cd
order patientid date, first 

foreach var of varlist admtng_icd10_diag_cd- sec_icd9_diag_cd10 {
replace `var' = subinstr(`var'," ","",.)
replace `var'="" if `var'=="NA"
}

drop facil

rename sec_icd9_diag_cd* icd9_*
rename sec_icd10_diag_cd* icd10_*
rename princ_icd9 icd9_0
rename princ_icd10 icd10_0
rename admtng_diag_cd icd9_11
rename admtng_icd10_diag_cd icd10_25

drop patientid0
order _all, sequential
order patientid, first

merge m:1 patientid using first_mh.dta
drop if _merge==2
drop _merge
replace first_mh = mofd(first_mh)
generate diff = mofd(date)-first_mh

save diagnosis_facility.dta, replace

*work with facility data - select only JPE kids
cd "`home'"
use diagnosis_facility.dta, clear
merge m:1 mmi using `mmi'
keep if _merge==3
drop _merge
save jpe_facility_diagnoses.dta, replace

*professional files
import delimited prof_a.txt, varnames(1) clear
generate date = date(serv_end_dt,"YMD")
format date %td
drop serv_end patientid0

duplicates drop

tempfile temp
save `temp', replace

import delimited prof_b.txt, varnames(1) clear
by professional_id, s: keep if _n==1

merge 1:1 professional_id using `temp'
drop _merge

save prof.dta, replace

use prof.dta, clear

foreach var of varlist sec_icd9_diag_cd1-princ_icd10_diag_cd {
replace `var' = subinstr(`var'," ","",.)
replace `var'="" if `var'=="NA"
}

drop count pfo

rename sec_icd9_diag_cd* icd9_*
rename sec_icd10_diag_cd* icd10_*
rename princ_icd9 icd9_0
rename princ_icd10 icd10_0

order _all, sequential
drop _merge
order patientid, first

merge m:1 patientid using first_mh.dta
drop if _merge==2
drop _merge
replace first_mh = mofd(first_mh)
generate diff = mofd(date)-first_mh

save diagnosis_professional.dta, replace

*work with professional data
cd "`home'"
use diagnosis_professional.dta, clear
merge m:1 mmi using `mmi'
keep if _merge==3
drop _merge
save jpe_prof_diagnoses.dta, replace

*******************************************************************
*******************************************************************
**# Bookmark #7
*SECTION G: neurodevelopmental.dta
*******************************************************************

*work with facility data
cd "`home''"
use diagnosis_facility.dta, clear
keep if diff<=3

tempfile temp
save `temp', replace

keep patientid diff icd9*
by patientid diff, s: generate claim = _n
reshape long icd9_, i(patientid diff claim) j(i)
drop claim i
rename icd9_ icd9
drop if icd9==""
tempfile temp9
save `temp9', replace

use `temp', clear

keep patientid diff icd10*
by patientid diff, s: generate claim = _n
reshape long icd10_, i(patientid diff claim) j(i)
drop claim i
rename icd10_ icd10
drop if icd10==""
tempfile temp10
save `temp10', replace

*work with professional data
use diagnosis_professional.dta, clear
keep if diff<=3

tempfile temp2
save `temp2', replace

keep patientid diff icd9*
by patientid diff, s: generate claim = _n
reshape long icd9_, i(patientid diff claim) j(i)
drop claim i
rename icd9_ icd9
drop if icd9==""
tempfile temp92
save `temp92', replace

use `temp2', clear

keep patientid diff icd10*
by patientid diff, s: generate claim = _n
reshape long icd10_, i(patientid diff claim) j(i)
drop claim i
rename icd10_ icd10
drop if icd10==""
tempfile temp102
save `temp102', replace

*combine two diagnosis code files (ICD9 files)

use `temp9', clear
append using `temp92'

*flag the V40.0 codes--hard code to number so retained
replace icd9 = "0" if icd9=="V40.0"

rename icd9 code
replace code="" if real(code)==.
destring code, replace
merge m:1 code using icd9_mh.dta
generate mh = _merge==3
drop if _merge==2
drop _merge

generate inrange = code>=290 & code<320 | code==0 | code==330.8
generate codef = floor(code)

*hard code neurodevelopmental
generate nd = codef==314 | codef==312 | codef==313 | codef==309 | codef==307 | codef==299 | codef==330 | codef==315 | codef==316 | codef==0
*get rid of alexia, tension headache, and any tagged MH
replace nd = 0 if code==315.01
replace nd = 0 if code==307.81 | code==307.89
replace nd = 0 if mh

generate adhd = codef==314 & nd==1

collapse (sum) nd adhd, by(patientid)

*hard code to 1 (could be above)
replace nd=1 if nd>1
replace adhd=1 if adhd>1

*save file
save nd_9.dta, replace

*combine two diagnosis code files (ICD10 files)

use `temp10', clear
append using `temp102'

rename icd10 code
merge m:1 code using icd10d.dta
generate mh = i_mh==1
drop if _merge==2
drop _merge

generate inrange = substr(code,1,1)=="F"
generate nd = substr(code,1,2)=="F9" | substr(code,1,2)=="F8"
replace nd = 0 if mh
generate adhd = substr(code,1,3)=="F90"

collapse (sum) nd adhd, by(patientid)

*hard code to 1 (could be above)
replace nd =1 if nd>1
replace adhd=1 if adhd>1

*save file
save nd_10.dta, replace

*combine two files
use nd_9.dta, clear
append using nd_10.dta
collapse (sum) nd adhd, by(patientid)
replace nd =1 if nd>1
replace adhd=1 if adhd>1

merge 1:1 patientid using final_final.dta
drop if _merge==1
drop _merge
replace adhd=0 if adhd==.
replace nd=0 if nd==.

save neurodevelopmental.dta, replace

*******************************************************************
*******************************************************************
**# Bookmark #8
*SECTION H: diagnosis_revised_final.dta
*******************************************************************

cd "`home'"
use jpe_facility_diagnoses.dta, clear
append using jpe_prof_diagnoses.dta

*keep only first three months
keep if diff<=3

*keep only final sample
merge m:1 patientid using final_jpe_sample.dta
keep if _merge==3
drop _merge

*flag depression (based off of DSM-5)
generate depression = 0

foreach var of varlist icd9_0-icd9_11 {
replace depression = 1 if regexm(`var',"296.[2-3]+[0-9]*") | `var'=="296.99" | `var'=="300.4" | `var'=="625.4" | `var'=="298.83" | `var'=="311"
}
foreach var of varlist icd10_0-icd10_25 {
replace depression = 1 if regexm(`var',"F3[2-3]+.*") | `var'=="F34.8" | `var'=="F34.1" | `var'=="F94.3" | `var'=="F06.31" | `var'=="F06.32" | `var'=="F06.34" | `var'=="F32.8" | `var'=="F32.9"
}

*flag anxiety (based off of DSM-5) - I added F41.3 because must be new since DSM-5.
generate anxiety = 0 
foreach var of varlist icd9_0-icd9_11 {
replace anxiety = 1 if `var'=="309.21" | `var'=="313.23" | `var'=="300.29" | `var'=="300.23" | `var'=="300.01" | `var'=="300.22" | `var'=="300.02" | `var'=="293.84" | `var'=="300.09" | `var'=="300.00"
}

foreach var of varlist icd10_0-icd10_25 {
	replace anxiety = 1 if `var'=="F93.0" | `var'=="F94.0" | regexm(`var',"F40.2[0-9]*") | `var'=="F40.10" | `var'=="F41.0" | `var'=="F40.00" | `var'=="F41.1" | `var'=="F06.4" | `var'=="F41.8" | `var'=="F41.9" | `var'=="F41.3"
}

*flag adjustment with anxiety (based off of DSM-5)
generate adjustment_anxiety = 0
foreach var of varlist icd9_0-icd9_11 {
replace adjustment_anxiety = 1 if `var'=="309.24"
}
foreach var of varlist icd10_0-icd10_25 {
	replace adjustment_anxiety = 1 if `var'=="F43.22"
}

*flag adjustment with depression (based off of DSM-5)
generate adjustment_depression = 0
foreach var of varlist icd9_0-icd9_11 {
replace adjustment_depression = 1 if `var'=="309.0" | `var'=="309.1"
}
foreach var of varlist icd10_0-icd10_25 {
	replace adjustment_depression = 1 if `var'=="F43.21"
}

*flag adjustment with depression AND anxiety (based off of DSM-5)
generate adjustment_dax = 0
foreach var of varlist icd9_0-icd9_11 {
replace adjustment_dax = 1 if `var'=="309.28"
}
foreach var of varlist icd10_0-icd10_25 {
	replace adjustment_dax = 1 if `var'=="F43.23"
}

*just pull IDS for those who have
generate before = diff<0
collapse (sum) depression-adjustment_dax, by(before patientid)
generate problem = before*(depression+anxiety+adjustment_anxiety+adjustment_depression+adjustment_dax)
collapse (sum)  depression- adjustment_dax problem, by(patientid)
drop problem

foreach var of varlist depression-adjustment_dax {
	replace `var' = 1 if `var'>1
}

keep patientid depression-adjustment_dax

*generate ANY, combining adjustment
generate any_depression = depression + adjustment_depression + adjustment_dax>0
generate any_anxiety = anxiety + adjustment_anxiety + adjustment_dax>0
generate any_adjustment = adjustment_anxiety + adjustment_depression + adjustment_dax>0
generate any_multiple = any_depression + any_anxiety + any_adjustment>1

egen temp = rowtotal(depression-adjustment_dax)
generate only_depression = depression == 1 & temp==1
generate only_anxiety = anxiety == 1 & temp==1
generate only_adjustment = (adjustment_anxiety + adjustment_depression + adjustment_dax)==temp
generate formal_multiple = depression + anxiety==2

save diagnosis_revised_final.dta, replace

*******************************************************************
*******************************************************************
**# Bookmark #9
*SECTION I: hurting_yourself.dta
*******************************************************************

cd "`home'"
use jpe_facility_diagnoses.dta, clear

*look for suicide ideation
keep if diff<=24

*tag ideation
generate ideation = 0
foreach var of varlist icd9_0-icd9_11 {
replace ideation = 1 if regexm(`var',"V62.84")
}
foreach var of varlist icd10_0-icd10_25 {
replace ideation = 1 if regexm(`var',"R45.851")
}

*tag suicide attempt
generate attempt = 0
foreach var of varlist icd10_0-icd10_25 {
replace attempt = 1 if regexm(`var',"T14.91")
}

*tag self-harm
generate harm = 0
foreach var of varlist icd9_0-icd9_11 {
replace harm = 1 if regexm(`var',"^E95[0-9]+[.]*")
}
foreach var of varlist icd10_0-icd10_25 {
replace harm = 1 if regexm(`var',"^T[3-7]+[0-9]*.[0-9]*[A]$") | regexm(`var',"^X[7-8]+[0-9]*.[0-9]*[A]$") 
}

*generate a flag for hurting yourself
generate hurting_yourself = ideation+attempt+harm
replace hurting_yourself = 1 if hurting_yourself>0

keep patientid diff hurting_yourself

*flag timeframes
generate before = diff<0
generate first_three = diff>=0 & diff<=3
generate first_year = diff>=0 & diff<=12
generate second_year = diff>=0 & diff<=24

foreach time of varlist before-second_year {
	replace `time' = `time'*hurting_yourself
}
collapse (sum) before-second_year, by(patientid)

*save file
cd "`main'"
save hurting_yourself.dta, replace

*******************************************************************
*******************************************************************
**# Bookmark #10
*SECTION J: other_mh_diagnosis.dta
*Description: code for robustness check: want to exclude children with diagnoses for
*	psychosis, bipolar, or ADHD + conduct in initial three months
*******************************************************************

cd "`home'"
use jpe_facility_diagnoses.dta, clear
append using jpe_prof_diagnoses.dta

*keep only first three months
keep if diff>=0 & diff<=3

*keep only final sample
merge m:1 patientid using final_jpe_sample.dta
keep if _merge==3
drop _merge

*parse variables
keep patientid icd9* icd10* 
by patientid, s: generate id = _n

*reshape long
reshape long icd9_ icd10_, i(patientid id) j(number)
drop number id

*get rid of empty rows
replace icd10_="" if icd10_=="UN"
drop if icd10_=="" & icd9_==""

*save the data here
save all_diagnoses_jpe_kids_1st3m.dta, replace

cd "`home'"
use all_diagnoses_jpe_kids_1st3m.dta, clear

*schizophrenia spectrum and other psychotic
generate psychotic_disorder = 0
replace psychotic_disorder = 1 if regexm(icd9,"301.22") | regexm(icd9,"297.1") | regexm(icd9,"298.8") | regexm(icd9,"295.40") | regexm(icd9,"295.90") | regexm(icd9,"295.70") | regexm(icd9,"293.8[0-9]+") | regexm(icd10,"F21") | regexm(icd10,"F22") | regexm(icd10,"F23") | regexm(icd10,"F20.81") | regexm(icd10,"F20.9") | regexm(icd10,"F25.0") | regexm(icd10,"F25.1") | regexm(icd10,"F06.[012]+") | regexm(icd10,"F28") | regexm(icd10,"F29")

*generate bipolar disorders
generate bipolar = 0
replace bipolar = 1 if regexm(icd9,"296.4[0-9]+") | regexm(icd9,"296.5[0-9]+") | regexm(icd9,"296.7") | regexm(icd9,"296.89") | regexm(icd9,"301.13") | regexm(icd9,"293.89") | regexm(icd9,"296.83") | regexm(icd9,"296.89") | regexm(icd9,"296.80") | regexm(icd10,"F31.1[123]+") | regexm(icd10,"F31.7[123456]+") | regexm(icd10,"F31.9") | regexm(icd10,"F31.31") | regexm(icd10,"F31.32") | regexm(icd10,"F31.4") | regexm(icd10,"F31.5") | regexm(icd10,"F31.81") | regexm(icd10,"F34.0") | regexm(icd10,"F06.3[34]+") | regexm(icd10,"F31.89")

*generate autism + behavioral
generate autism = 0
replace autism = 1 if regexm(icd9,"299.00") | regexm(icd10,"F84")

*generate tourette's
generate tourettes = 0
replace tourettes = 1 if regexm(icd9,"307.23") | regexm(icd10,"F95.2")

*create flag for problematic diagnosis
egen problematic = rowtotal(psychotic-tourettes)
collapse (sum) problematic, by(patientid)
replace problematic = 1 if problematic>1

*save file
cd "`data'"
save other_mh_diagnosis.dta, replace

*******************************************************************
**# Bookmark #11
*SECTION K: market-measures.dta, network_table.dta
*******************************************************************
*******************************************************************

******************************************************************************
*SECTION A: define network of physicians who are serving a particular zip code
******************************************************************************
cd "`network'"

*bring in therapy visits
import delimited network_therapy.txt, varnames(1) clear

*recode therapy type
generate type = ""
replace type= "psychotherapy" if cpt==90832 | cpt==90834 | cpt==90837 | cpt==90833 | cpt==90836 | cpt==90838
replace type= "complex" if cpt==90785
replace type= "family" if cpt==90846 | cpt==90847 | cpt==90849
replace type= "group" if cpt==90853
replace type= "psychoanalysis" if cpt==90845
replace type= "psychopharmacology" if cpt==90862 | cpt==90863
replace type= "crisis" if cpt==90839 | cpt==90840
replace type= "diagnostic" if cpt==90791 | cpt==90792
drop cpt

generate date = date(serv_end_dt,"YMD")
format date %td
drop serv_

rename memb_clm zip
rename npi npi

save network_therapy.dta, replace

*bring in antidepressant scripts
import delimited network_ads.txt, varnames(1) clear
generate date = date(serv_dt,"YMD")
format date %td
rename npi npi
rename memb_clm zip
rename prod ndc
generate type="antidepressant"
drop serv_dt
save network_ads.dta, replace

*bring in mental health scripts
import delimited network_mhs.txt, varnames(1) clear
generate date = date(serv_dt,"YMD")
format date %td
rename npi npi
rename memb_clm zip
rename prod ndc
generate type="other MH"
drop serv_dt
save network_mhs.dta, replace

*append claims files; import MMI
use network_therapy.dta, clear
append using network_ads.dta
append using network_mhs.dta

preserve
import delimited network_ids_mmi.txt, varnames(1) clear
drop dates
duplicates drop
tempfile temp
save `temp', replace
restore

merge m:1 patientid0 using `temp'
drop _merge

*note: some people don't have master member ID's
tostring patientid0 patientid, replace

tempvar temp
generate `temp' = patientid
replace `temp' = "m"+`temp' if patientid!="."
replace `temp' = patientid0 if patientid=="."
egen id = group(`temp')
drop `temp'

*bring in location of provider
replace npi="" if real(npi)==.
destring npi, replace

*merge in the NPPES data
merge m:1 npi using npi_loc.dta
drop if _merge==2

replace prac_zip_code = "missing" if prac_zip_code==""
rename prac_zip_code doc_zip
drop *bus*
drop _merge

*create cells (note the nppes file has zip9 where available)
generate year = yofd(date)

*grab off the 5 digit zip code
generate length =length(doc_zip)
replace doc_zip = substr(doc_zip,1,5) if length==9
replace doc_zip = substr(doc_zip,1,5) if length==10
replace length = length(doc_zip)
replace doc_zip = "missing" if length!=5

*convert to ZCTAs
preserve
import excel using PRINCETON_06192019.zip_to_zcta_2018.xlsx, firstrow clear
rename ZIP_CODE zip
tempfile temp
save `temp', replace
restore

merge m:1 zip using `temp'
drop if _merge==2
replace zip = "missing" if _merge==1
rename ZCTA zcta_kid
replace zcta_kid = "missing" if zcta_kid==""
drop zip length-ZIP_TYPE zip_join _merge

rename doc_zip zip
merge m:1 zip using `temp'
drop if _merge==2
replace ZCTA = "missing" if _merge==1
rename ZCTA zcta_doc
drop _merge
drop PO_NAME-ZIP_TYPE zip_join_type
drop zip

*safe file here
save network_full.dta, replace

*bring in file
use network_full.dta, clear

*get rid of any claims with missing geographic info
drop if zcta_kid=="missing" | zcta_doc=="missing"

*bring in the zip code circle file
rename zcta_kid zip1
rename zcta_doc zip2
merge m:1 zip1 zip2 using gaz2016zcta5distance50miles.dta
drop if _merge==2
replace _merge=3 if zip1==zip2 

*flag any zips more than 50 miles apart
generate more50 = _merge==1
drop _merge

*replace distance to zero if same zcta's
replace mi_to_zcta5 = 0 if zip1==zip2

*rename variables
rename zip1 zcta_kid
rename zip2 zcta_doc

*combine all services into "TREATMENT" region
generate i = 1
collapse (sum) i, by(id zcta_kid zcta_doc mi_to_zcta5 more50)
rename i visits
generate i = 1
by id zcta_kid, s: generate flag = _n==1
preserve
collapse (sum) flag, by(zcta_kid)
rename flag N_kids
tempfile temp
save `temp', replace
restore
collapse (sum) visits i, by(zcta_kid zcta_doc mi_to_zcta5 more50)
rename i kids
merge m:1 zcta_kid using `temp'
drop _merge

*fix up data
order zcta_kid zcta_doc N_kids kids visits mi_to_zcta5 more50

*keep any ZCTA with at least 10 kids
keep if N_kids>=10

*grab the top 10 zip code regions visit
replace kids = -kids
sort zcta_kid kids, stable
by zcta_kid: generate counter=_n
replace kids = -kids

*keep the most popular 10 ZCTAs
keep if counter<=10

*reshape wide
drop more50
rename mi miles
reshape wide zcta_doc kids visits miles, i(zcta_kid) j(counter)
order N_kids, after(zcta_kid)

*save file
save network_table.dta, replace

*SECTION B: physician measures

*The file has a record for each doc, year, patient, by MH/not between 10-18.
cd "`home'"
*The file has a record for each doc, year, patient (patientid0/patientid), by MH/not between 10-18.
import delimited ccc_physician_measure4b.txt, varnames(1) clear
drop v7

*get rid of non-numeric NPIs
drop if real(npi)==.
destring npi, replace
rename npi npi
format npi %010.0f

*generate ID variable (applying patientid if available)
tostring patientid0 patientid, replace
generate id = patientid
replace id = "n"+patientid0 if patientid=="."

*collapse into doc-patient-service year records
collapse (sum) non_mh_claims mh_claims, by(script_year npi id)

*flag if mental health drug recipient (NOTE DOING THIS BECAUSE LABELED BACKWARDS in the SQL file!)
generate mh_patient = non_mh_claims>0
generate patients = 1

*count up number of mental health patients in doc portfolio in year
collapse (sum) mh_patient patients, by(script_year npi)

*generate share variable
generate share_mh = mh_patient/patients

*save file
destring script_year, replace
cd "`main'"
save share_mh_in_drug_portfolio.dta, replace

*physicians treating MH in kids without drugs
cd "`home'"
*The file has a row for each doc, service year, and age bracket, where the "patients" variable indicates the presence of claims in that age bracket.
import delimited ccc_physician_measure7c.txt, varnames(1) clear

*get rid of non-numeric NPIs
drop if real(npi)==.
destring npi, replace
rename npi npi
format npi %010.0f

*reshape wide
by npi service_year age_bracket, s: generate flag = _N
drop if flag>1
drop flag
reshape wide patients, i(npi service_year) j(age_bracket) str

foreach var of varlist patientsadult patientschild {
replace `var' = 0 if `var'==.
}

cd "`main'"
save nondrugtreatment_of_mh.dta, replace

*prescribing within insurer data
*this file has a row for each doc, year, age, patient, and ndc code where there was such a patient
*	includes only mental health drugs
cd "`home'"
import delimited ccc_physician_measure1e.txt, varnames(1) clear

*fix up product code
tostring prod_serv_id_cd, gen(ndc11) format(%011.0f)
generate ndc9 = substr(ndc11,1,9)
drop ndc11 prod_serv_id_cd
destring ndc9, replace
merge m:1 ndc9 using child_index_mini.dta
keep if _merge==3
drop _merge

*keep only mental health records
keep if mh

*did more accurate age coding for bad prescribing, hence why we pull 9 year olds here,
*	but not before. hard code the 9's to 10 to be consistent
destring age, replace
replace age = 10 if age==9

*keep "bad" with tca and benzo
by npi script_year new_id, s: keep if _n==1
generate id = 1

*calculate share of patients
collapse (sum) id, by(npi script_year)

*calculate the kids in portfolio 
rename id patients
rename npi npi

*parse variables
keep npi script_year patients

*fix up NPI
drop if real(npi)==.
destring npi, replace

*save file
cd "`main'"
save insurer_prescribing.dta, replace

*insurer-based bad prescribing
cd "`main'"
use insurer_prescribing.dta, clear
rename patients patients_mhdrug

*merge insurer-based share of drug portfolio getting MH drugs
merge 1:1 npi script_year using share_mh_in_drug_portfolio.dta
*note: mh_patient = patients_mhdrug
drop _merge

*merge insurer-based patient load for non-drug treatment
rename script_year service_year
preserve
use nondrugtreatment_of_mh.dta, clear
keep if patientschild>0
keep npi service_year patientschild
tempfile temp
save `temp', replace
restore

merge 1:1 npi service_year using `temp'
drop _merge

*get specialty
cd "`network'"
merge m:1 npi using nppes_102018.dta
keep if _merge==3
drop _merge
*here, we lose 10,104 provider-year observations

*label taxonomy

*psychiatrists
generate psych = 0
forvalues i = 1(1)9 {
replace psych = 1 if healthcareprovidertaxonomycode_`i'=="2084P0800X" | healthcareprovidertaxonomycode_`i'=="2084P0804X"
}

*GPs
generate gp = 0 
forvalues i = 1(1)9 {
replace gp = 1 if healthcareprovidertaxonomycode_`i'=="208000000X" | healthcareprovidertaxonomycode_`i'=="2080A0000X" | healthcareprovidertaxonomycode_`i'=="2080P0006X" | healthcareprovidertaxonomycode_`i'=="207Q00000X" | healthcareprovidertaxonomycode_`i'=="207QA0000X" | healthcareprovidertaxonomycode_`i'=="208D00000X" 
}

*therapists
generate therapist = 0 
generate np = 0
forvalues i = 1(1)9 {
di `i'
replace therapist = 1 if healthcareprovidertaxonomycode_`i'=="1041C0700X" | healthcareprovidertaxonomycode_`i'=="101YM0800X" | healthcareprovidertaxonomycode_`i'=="101YP2500X" | healthcareprovidertaxonomycode_`i'=="103TC0700X" | healthcareprovidertaxonomycode_`i'=="103T00000X" | healthcareprovidertaxonomycode_`i'=="2084P0800X" | healthcareprovidertaxonomycode_`i'=="106H00000X" | healthcareprovidertaxonomycode_`i'=="101Y00000X" | healthcareprovidertaxonomycode_`i'=="104100000X" | healthcareprovidertaxonomycode_`i'=="2084P0804X" | healthcareprovidertaxonomycode_`i'=="363LP0808X" | healthcareprovidertaxonomycode_`i'=="103TC2200X"
replace np = 1 if healthcareprovidertaxonomycode_`i'=="363LP0808X"
}
replace therapist = 0 if psych==1

drop healthcareprovidertaxonomycode_1-healthcareprovidertaxonomycode_9

generate other = psych+gp+therapist==0

*merge in doc location
cd "`network'"
merge m:1 npi using npi_loc.dta
drop if _merge==2
drop _merge bus

*fix up zip
rename zip zip_molly
rename prac zip
replace zip = substr(zip,1,5)

*convert to ZCTAs
preserve
cd "`network'"
import excel using PRINCETON_06192019.zip_to_zcta_2018.xlsx, firstrow clear
rename ZIP_CODE zip
destring zip ZCTA, replace
keep zip ZCTA
tempfile temp
save `temp', replace
restore

drop if real(zip)==.
destring zip, replace
merge m:1 zip using `temp'
keep if _merge==3
drop _merge
rename ZCTA zcta_doc

*save data
cd "`main'"
save physician-information_jpe.dta, replace

*calculate instruments
*get market-level physician measures

*bring in data
cd "`main'"
use physician-information_jpe.dta, clear

*reset everything to zero if missing
foreach var of varlist patients_mhdrug- other {
replace `var' = 0 if `var'==.
}

*instruments 6-8
*#gp, therapist, psych treating kids per 1k kids
*not specific to MH treatment

preserve

*grab anyone with kid in their portfolio (not necessarily MH treatment-specific)
keep if patientschild+patients>0

*keep gps, therapists, psychiatrists
keep if gp+therapist+psych>0

*collapse into counts by ZCTA
collapse (sum) psych gp therapist, by(year zcta_doc)

*save file
tempfile temp1
save `temp1', replace

restore

*instrument 3 (share_mh)
*share GP portfolio receiving mh drugs, weight by insurer patient load

preserve

*grab GPs
keep if gp==1

*grab any whose pharmacy claims to kids we see
keep if patients>0

*collapse into zcta mean share, weighting by patient load
collapse (mean) share_mh [w=patients], by(zcta_doc year)

*save file
tempfile temp2
save `temp2', replace

restore

*instruments 1 and 2 (share_psych, share_gp)
*share of supply of those treating kids, which share are each specialty

preserve

*grab anyone with kid in their portfolio
keep if patientschild+patients_mhdrug>0

*keep all psych, therapist, and anyone writing at least one MH
keep if psych==1 | therapist==1 | patients_mhdrug>0

*generate count
collapse (sum) psych therapist gp other, by(zcta_doc year)

*generate total supply
generate mh_supply = psych+therapist+gp+other
rename gp n_gp
rename psych n_psych

*save file
keep year zcta_doc mh_supply n_gp n_psych

tempfile temp3
save `temp3', replace

restore

*pull together measures
use `temp1', clear
merge 1:1 year zcta_doc using `temp2'
drop _merge
merge 1:1 year zcta_doc using `temp3'
drop _merge

*recode zeros
foreach var of varlist psych-mh_supply {
replace `var'=0 if `var'==.
}

*bring in kid count
rename zcta_doc zcta_kid
cd "`network'"
merge 1:1 year zcta_kid using kids_per_zcta.dta
rename _merge _merge_kidpopn
drop _merge

*recode zeros
foreach var of varlist psych-children {
replace `var'=0 if `var'==.
}

rename zcta zcta

*save zcta file
cd "`main'"
save zcta-stats.dta, replace

*convert into market-level measures

*expand into yearly records
cd "`network'"
use network_table.dta, clear

expand 7
by zcta_kid, s: generate year = 2012+_n-1

reshape long zcta_doc kids visits miles, i(zcta_kid year) j(num)

*get rid of any invalid zip code records
drop if zcta_doc==""
drop miles num

*merge in info
destring zcta_doc, replace
rename zcta_doc zcta
cd "`main'"
merge m:1 zcta year using zcta-stats.dta
keep if _merge==3
drop _merge

*collapse sum
collapse (rawsum) psych gp therapist mh_supply n_psych n_gp children (mean) share_mh [w=visits], by(zcta_kid year)

foreach var of varlist psych-therapist {
generate `var'_pm = (`var'/children)*1000000
}

generate share_psych = n_psych/mh_supply
generate share_gp = n_gp/mh_supply

*parse variables
keep zcta_kid year psych- therapist share_mh- share_gp

*by kid, prescribing measures
cd "`main'"
save market-measures.dta, replace

*instrument 4 (s1_by_psych)
cd "`main'"
use all_instrZ_noPAT.dta, clear

*get specialty
cd "`network'"
merge m:1 npi using nppes_102018.dta
keep if _merge==3
drop _merge
*here, we lose 10,104 provider-year observations

*label taxonomy

*psychiatrists
generate psych = 0
forvalues i = 1(1)9 {
replace psych = 1 if healthcareprovidertaxonomycode_`i'=="2084P0800X" | healthcareprovidertaxonomycode_`i'=="2084P0804X"
}

*GPs
generate gp = 0 
forvalues i = 1(1)9 {
replace gp = 1 if healthcareprovidertaxonomycode_`i'=="208000000X" | healthcareprovidertaxonomycode_`i'=="2080A0000X" | healthcareprovidertaxonomycode_`i'=="2080P0006X" | healthcareprovidertaxonomycode_`i'=="207Q00000X" | healthcareprovidertaxonomycode_`i'=="207QA0000X" | healthcareprovidertaxonomycode_`i'=="208D00000X" 
}

*therapists
generate therapist = 0 
generate np = 0
forvalues i = 1(1)9 {
di `i'
replace therapist = 1 if healthcareprovidertaxonomycode_`i'=="1041C0700X" | healthcareprovidertaxonomycode_`i'=="101YM0800X" | healthcareprovidertaxonomycode_`i'=="101YP2500X" | healthcareprovidertaxonomycode_`i'=="103TC0700X" | healthcareprovidertaxonomycode_`i'=="103T00000X" | healthcareprovidertaxonomycode_`i'=="2084P0800X" | healthcareprovidertaxonomycode_`i'=="106H00000X" | healthcareprovidertaxonomycode_`i'=="101Y00000X" | healthcareprovidertaxonomycode_`i'=="104100000X" | healthcareprovidertaxonomycode_`i'=="2084P0804X" | healthcareprovidertaxonomycode_`i'=="363LP0808X" | healthcareprovidertaxonomycode_`i'=="103TC2200X"
replace np = 1 if healthcareprovidertaxonomycode_`i'=="363LP0808X"
}
replace therapist = 0 if psych==1

generate count = 1
collapse (sum) psych count, by(PAT_ZIP3 year)

merge 1:1 year PAT_ZIP3 using n_mh_child_counts.dta
drop _merge

generate s1_by_psych = psych/tn_children

preserve

use instrZ.dta, clear

generate s1_by_psych_alt = psych/count
drop psych count

tempfile temp
save `temp', replace

restore

merge 1:1 PAT_ZIP3 year using `temp'
drop _merge

keep PAT_ZIP3 year s1*

save first_script_by.dta, replace

use market-measures.dta, clear

generate PAT_ZIP3 = substr(zcta_kid, 1, 3)
destring PAT_ZIP3, replace

merge m:1 PAT_ZIP3 year using first_script_by.dta
keep if _merge==3
drop _merge

cd "`home'"
save market-measures.dta, replace

*******************************************************************
*******************************************************************
**# Bookmark #12
*SECTION L: market-measures-antipsych.dta
*******************************************************************
*******************************************************************

*instrument 5 (share_antipsych)
**clean MMI information
cd "`data'"
import delimited jpe_bp_kids_wmmi2.txt, varnames(1) clear
save jpe_bp_kids_wmmi.dta, replace

**clean coverage information
import delimited jpe_bp_kidscov_postdiag2.txt, varnames(1) clear

*fix dates
generate begin = date(begin_dt,"YMD", 2020)
generate earliest2 = date(earliest,"YMD", 2020)
format begin earliest2 %td
drop begin_dt earliest
rename earliest2 earliest
*flag if carved-out pharmacy coverage
generate carved_out = phrmcy_carve_out_subm_ind=="C" | phrmcy_carve_out_subm_ind=="I" | phrmcy_carve_out_subm_ind=="X" | phrmcy_carve_out_subm_ind=="Y"
drop phrmcy
*get rid of anyone with carveouts
by patientid, s: egen ever_carve = sum(carved_out)
drop if carved_out>0
drop carved_out ever_carve
*keep coverage post earliest diagnosis
replace begin = mofd(begin)
format begin %tm
rename begin date
*get rid of duplicates
duplicates drop
*fix variables
rename mbrs zip
*save file
save clean_jpe_bp_kidscov_postdiag2.dta, replace

**clean pharmacy information
use clean_jpe_bp_kidscov_postdiag2.dta, clear
keep patientid0
by patientid0, s: keep if _n==1
save all_dax_kids.dta, replace

import delimited jpe_bp_pharm.txt, varnames(1) clear
merge m:1 patientid0 using all_dax_kids.dta
keep if _merge==3
drop _merge

*fix up product code
tostring prod_serv_id_cd, gen(ndc11) format(%011.0f)
generate ndc9 = substr(ndc11,1,9)
drop ndc11 prod_serv_id_cd
*id active ingredient
destring ndc9, replace
cd "`network'"
merge m:1 ndc9 using child_index_mini.dta
keep if _merge==3
drop _merge
drop opioid
drop if active==""

*flag antipsychotics
generate antipsychotic = 0
foreach ap in aripiprazole carbamazepine chlorpromazine clozapine divalproex fluphenazine haloperidol lamotrigine lithium loxapine olanzapine "olanzapine-fluoxetine" paliperidone perphenazine "perphenazine-amitriptyline" prochlorperazine quetiapine risperidone thioridazine thiothixene trifluoperazine ziprasidone {
	replace antipsychotic = 1 if regexm(active,"`ap'")
}

keep patientid0 serv_dt antipsychotic
generate date = mofd(date(serv_dt,"YMD", 2020))
format date %tm
drop serv_dt
collapse (sum) antipsychotic, by(patientid0 date)
replace antipsychotic = 1 if antipsychotic>1
keep if antipsychotic

*merge in MMI
merge m:1 patientid0 using jpe_bp_kids_wmmi.dta
drop if _merge==2
drop _merge

*get the patientid
save jpe_bp_pharm.dta, replace

*merge coverage and antipsych files
use clean_jpe_bp_kidscov_postdiag2.dta, clear
merge m:1 patientid0 date using jpe_bp_pharm.dta
replace antipsychotic = 0 if antipsychotic==.

by patientid, s: egen diagnosed = mean(earliest)
format diagnosed %td
drop earliest
*these are hang records where no coverage record exists
drop if _merge==2
drop _merge

*convert to annual
replace date = yofd(dofm(date))
format date %ty
collapse (sum) antipsychotic, by(patientid patientid0 zip date diagnosed)
replace antipsychotic = 1 if antipsychotic>1

*save the file
save all_dax_kids.dta, replace

*collapse into mmi/date/antipsychotic/zip
collapse (sum) antipsychotic, by(patientid zip date diagnosed)
replace antipsychotic = 1 if antipsychotic>1

*convert to ZCTAs
preserve
cd "`network'"
import excel using PRINCETON_06192019.zip_to_zcta_2018.xlsx, firstrow clear
rename ZIP_CODE zip
destring zip ZCTA, replace
keep zip ZCTA
tempfile temp
save `temp', replace
restore

drop if real(zip)==.
destring zip, replace
merge m:1 zip using `temp'
keep if _merge==3
drop _merge
rename ZCTA zcta_kid

collapse (sum) antipsychotic, by(patientid date zcta_kid)
replace antipsychotic=1 if antipsychotic>1

generate child = 1
collapse (sum) child antipsychotic, by(zcta_kid date)
rename date year

*calculate share antipsychotic
generate share_antipsychotic = antipsychotic/child
generate share_anti_yes = (antipsychotic-1)/(child-1)
generate share_anti_no = antipsychotic/(child-1)

*parse variables
keep year zcta_kid share*

*save the file
save share_antipsychotic.dta, replace

*convert into market-level measures

*expand into yearly records
cd "`network'"
use network_table.dta, clear

expand 7
by zcta_kid, s: generate year = 2012+_n-1
keep zcta* year visits*

reshape long zcta_doc visits, i(zcta_kid year) j(num)

*get rid of any invalid zip code records
drop if zcta_doc==""

*merge in info
destring zcta_doc, replace
rename zcta_kid use_later
rename zcta_doc zcta_kid
cd "`data'"
merge m:1 zcta year using share_antipsychotic.dta
keep if _merge==3
drop _merge

*collapse sum
foreach var of varlist share_antipsychotic share_anti_yes share_anti_no {
	replace `var' = 0 if `var'==.
}
destring use_later, replace
replace share_anti_yes = share_antipsychotic if use_later!=zcta_kid
replace share_anti_no = share_antipsychotic if use_later!=zcta_kid

collapse (mean) share_antipsych share_anti_yes share_anti_no [w=visits], by(use_later year)

*by kid, prescribing measures
cd "`home'"
rename use_later zcta_kid
save market-measures-antipsych.dta, replace

*******************************************************************
*******************************************************************
**# Bookmark #13
*SECTION M: status_of_leave_out.dta
*******************************************************************
*******************************************************************

**clean MMI information
*cd "`data'"
*import delimited f5.txt, varnames(1) clear
*save f5.dta, replace

cd "`data'"
use f5.dta, clear
cd "`home'"
merge m:1 patientid using final_jpe_sample.dta
keep if _merge==3
drop _merge

rename patientid patientid_old

cd "`data'"
merge 1:m patientid0 using all_dax_kids.dta
keep if _merge==3
drop _merge

rename zcta_kid zcta_kid_regression

*convert to ZCTAs
preserve
cd "`network''"
import excel using PRINCETON_06192019.zip_to_zcta_2018.xlsx, firstrow clear
rename ZIP_CODE zip
destring zip ZCTA, replace
keep zip ZCTA
tempfile temp
save `temp', replace
restore

drop if real(zip)==.
destring zip, replace
merge m:1 zip using `temp'
keep if _merge==3
drop _merge
rename ZCTA zcta_kid

destring zcta_kid_regression, replace

*keep the years in regression
keep if year==date
keep if zcta_kid_regression==zcta_kid

collapse (sum) antipsychotic, by(year zcta_kid patientid_old)
replace antipsychotic = 1 if antipsychotic>1
rename patientid_old patientid

*save status of leaveout
save status_of_leave_out.dta, replace
